{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import phoenix as px\n",
    "\n",
    "pxc = px.Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from phoenix.session.evaluation import get_retrieved_documents\n",
    "\n",
    "retrieved_documents = get_retrieved_documents(pxc, project_name=\"research_assistant\")\n",
    "retrieved_documents.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "\n",
    "from phoenix.evals import (\n",
    "    OpenAIModel,\n",
    "    llm_classify,\n",
    ")\n",
    "\n",
    "nest_asyncio.apply()\n",
    "\n",
    "model = OpenAIModel(\n",
    "    model=\"gpt-4o\",\n",
    "    temperature=0.0,\n",
    ")\n",
    "\n",
    "bias_detection_prompt = \"\"\"\n",
    "You are an AI assistant designed to detect bias in documents. Given an original question\n",
    "and a document, your task is to analyze the document and determine if there is any bias present.\n",
    "\n",
    "Please follow these steps:\n",
    "1. Read the original question carefully.\n",
    "2. Read the document provided.\n",
    "3. Identify any statements, language, or information in the document that may indicate bias.\n",
    "4. Provide a classification of the document's bias level:\n",
    "Unbiased, Somewhat Unbiased, Somewhat Biased, Biased.\n",
    "5. Provide a detailed explanation for your classification, citing specific parts of the\n",
    "document that influenced your decision.\n",
    "\n",
    "Original Question: {input}\n",
    "Document: {reference}\n",
    "\n",
    "Your analysis should be thorough and objective. Please ensure that your explanation\n",
    "is clear and concise.\n",
    "\n",
    "Example response:\n",
    "************\n",
    "EXPLANATION: An explanation of your reasoning for the label you chose\n",
    "LABEL: \"bias\", \"unbiased\", \"somewhat biased\", \"somewhat unbiased\"\n",
    "************\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bias_classifications = llm_classify(\n",
    "    dataframe=retrieved_documents,\n",
    "    template=bias_detection_prompt,\n",
    "    model=model,\n",
    "    rails=[\"Unbiased\", \"Biased\", \"Somewhat Biased\", \"Somewhat Unbiased\"],\n",
    "    provide_explanation=True,\n",
    ")\n",
    "bias_classifications[\"score\"] = bias_classifications[\"label\"].map(\n",
    "    {\"unbiased\": 1, \"somewhat unbiased\": 0.75, \"somewhat biased\": 0.5, \"biased\": 0}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bias_classifications.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "span_bias_classifications = bias_classifications.copy()\n",
    "\n",
    "span_bias_classifications[\"average_score\"] = span_bias_classifications.groupby(\"context.span_id\")[\n",
    "    \"score\"\n",
    "].transform(\"mean\")\n",
    "\n",
    "span_bias_classifications[\"label\"] = (\n",
    "    span_bias_classifications[\"average_score\"]\n",
    "    .apply(\n",
    "        lambda x: min(\n",
    "            {1: \"unbiased\", 0.75: \"somewhat unbiased\", 0.5: \"somewhat biased\", 0: \"biased\"}.keys(),\n",
    "            key=lambda k: abs(k - x),\n",
    "        )\n",
    "    )\n",
    "    .map({1: \"unbiased\", 0.75: \"somewhat unbiased\", 0.5: \"somewhat biased\", 0: \"biased\"})\n",
    ")\n",
    "\n",
    "# Combine all rows with the same context.span_id into one row, with explanations being a concatenation of all the explanations\n",
    "span_bias_classifications = (\n",
    "    span_bias_classifications.groupby(\"context.span_id\")\n",
    "    .agg(\n",
    "        {\n",
    "            \"label\": \"first\",\n",
    "            \"explanation\": lambda x: \"\\n----\\n\".join(x),\n",
    "            \"exceptions\": \"first\",\n",
    "            \"execution_status\": \"first\",\n",
    "            \"execution_seconds\": \"mean\",\n",
    "            \"score\": \"mean\",\n",
    "            \"average_score\": \"first\",\n",
    "        }\n",
    "    )\n",
    "    .reset_index()\n",
    ")\n",
    "span_bias_classifications.set_index(\"context.span_id\", inplace=True)\n",
    "span_bias_classifications.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from phoenix.trace import DocumentEvaluations, SpanEvaluations\n",
    "\n",
    "px.Client().log_evaluations(\n",
    "    SpanEvaluations(\n",
    "        dataframe=span_bias_classifications,\n",
    "        eval_name=\"Bias Detection\",\n",
    "    ),\n",
    "    DocumentEvaluations(\n",
    "        dataframe=bias_classifications,\n",
    "        eval_name=\"Relevance\",\n",
    "    ),\n",
    ")"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
